from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.documents import Document
from langchain_community.embeddings.dashscope import DashScopeEmbeddings
import json
import os

# 2. 加载JSON数据
with open("interview_questions.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# 3. 转换为Document对象列表（必须步骤）
documents = []
for role, qa_list in data.items():
    for qa in qa_list:
        content = f"问题：{qa['question']}\n解答：{qa['description']}"
        documents.append(Document(page_content=content, metadata={"岗位": role}))

# 4. 初始化向量数据库并添加数据
embeddings = DashScopeEmbeddings(
    model='text-embedding-v1',
    dashscope_api_key=os.getenv("DASHSCOPE_API_KEY")
)
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory="career_qa_db"
)
